# -*- coding: utf-8 -*-
"""
------------------------------------------------------------------------------
    File Name:  umichsi650_class
    Author   :  wanwei1029
    Date     :  2019/1/18
    Desc     :
------------------------------------------------------------------------------
"""
# from keras.layers.core import Activation, Dense, Dropout, SpatialDropout1D
# from keras.layers.embeddings import Embedding
# from keras.layers.recurrent import LSTM
# from keras.models import Sequential
# from keras.preprocessing import sequence
# from sklearn.model_selection import train_test_split
import collections
import matplotlib.pyplot as plt
import nltk
import numpy as np
import os

BASE_DIR = "D:\\nas\\keras_action\\chapter06\\"


def process_raw_data():
    maxlen = 0
    word_freqs = collections.Counter()
    num_recs = 0
    with open(os.path.join(BASE_DIR, "umichsi650_training.txt")) as ftrain:
        for line in ftrain:
            label, sentence = line.strip().split("\t")
            words = nltk.word_tokenize(sentence.lower())
            if len(words) > maxlen:
                maxlen = len(words)
            for word in words:
                word_freqs[word] += 1
            num_recs += 1
    print("maxlen={0}".format(maxlen))
    print(word_freqs)
    print("num_recs = {0}".format(num_recs))


def demo():
    """
    """
    process_raw_data()


if __name__ == '__main__':
    test_method = "demo"
    if test_method == "demo":
        demo()
